Introduction

This is a study of time series data provided by Asset Mapping.

Data

The data source is an SQL output file, named sensors.sql in the data directory. The format of the data is illustrated by this example:

      object_name       | object_name |         time_stamp         | value 
------------------------+-------------+----------------------------+-------
 London 70bf78cf (None) | NoiseAvg    | 2017-02-02 15:16:04        |  50.9
 London 9c850e0a (None) | NoisePeak   | 2017-03-22 03:46:53        |  36.4

The data starts at line 3. Observation fields are delimted by a |. The first column is IC_meter. The second column is value_type, which we analyse below. The time_stamp and value columns are what they are.

TODO: Query data from source with SQL.

Importing Data

#Import all source files in the data directory and store them in a matrix, where the row name is the file name
data_files <- list.files(path = "data", pattern = "\\.sql\\.gz$")
#The columns are the source data set, and the unique lists of meters and value_types in that data set
sensors_df_mtrx <- matrix(list(), nrow = length(data_files), ncol = 3)
rownames(sensors_df_mtrx) <- data_files
#Set column names explicitly as the column headings in the source are not what we need
column_names <- c("IC_meter", "value_type", "time_stamp", "value")
row <- 1
for (file in data_files) {
  #Import data, skipping header rows and without factors for now, so that the data types can be more easily converted
  sensors_df_mtrx[[row, 1]] <- read.delim(paste("data/", file, sep = ""), sep="|", skip = 2, col.names = column_names, header = FALSE, stringsAsFactors = FALSE, strip.white = TRUE) 
  #Delete last row if the values are null.
  nrow_sensor_df <- nrow(sensors_df_mtrx[[row, 1]])
  if (sensors_df_mtrx[[row, 1]][nrow_sensor_df, 2] == "") {
    sensors_df_mtrx[[row, 1]] <- sensors_df_mtrx[[row, 1]][-nrow_sensor_df,]
  }
  #Convert time_stamp string to POSIXct, note that the milliseconds are retained with the %OS format. To see milliseconds when printing, use:
  #op <- options(digits.secs=6) 
  sensors_df_mtrx[[row, 1]]$time_stamp <- as.POSIXct(sensors_df_mtrx[[row, 1]]$time_stamp, tz = "UTC", format = "%Y-%m-%d %H:%M:%OS", usetz = TRUE)
  row <- row + 1
}

Subset Data

#Process sensors_df_mtrx to subset each source data frame into dataframes for each meter, 
#and then replace the source data with the matrix of dataframe subsets
for (row in 1:nrow(sensors_df_mtrx)) {
  sensor_df <- sensors_df_mtrx[[row, 1]]
  ic_meters <- unique(sensor_df$IC_meter)
  value_types <- unique(sensor_df$value_type)
  #The matrix will hold a dataframe subset for each IC_meter and value_type, with the name of each row being the IC_meter, and the column the value_type
  meter_value_type_subsets_mtrx <- matrix(list(), nrow = length(ic_meters), ncol = length(value_types))
  rownames(meter_value_type_subsets_mtrx) <- ic_meters
  colnames(meter_value_type_subsets_mtrx) <- value_types
  col_counter <- 1
  row_counter <- 1
  for (icm in ic_meters) {
    for (vt in value_types) {
      meter_value_type_subsets_mtrx[[row_counter, col_counter]] <- subset(sensor_df, (IC_meter == icm & value_type == vt), select = -c(IC_meter, value_type))
      col_counter <- col_counter + 1
    }
    col_counter <- 1
    row_counter <- row_counter + 1
  }
  #Replace the original dataframe with the matrix of dataframe subsets
  sensors_df_mtrx[[row, 1]] <- meter_value_type_subsets_mtrx
  #Record the unique meter names and value_types
  sensors_df_mtrx[[row, 2]] <- ic_meters
  sensors_df_mtrx[[row, 3]] <- value_types
}

Data Exploration

line_plot_gatherer <- htmltools::tagList()
plot_counter <- 0
for (row in 1:nrow(sensors_df_mtrx)) {
  #Add the name of the source file
  line_plot_gatherer[[(plot_counter <- plot_counter + 1)]] <- htmltools::h1(paste("Source file -", rownames(sensors_df_mtrx)[row])) 
  meter_value_type_subsets_mtrx <- sensors_df_mtrx[[row, 1]]
  ic_meters <- sensors_df_mtrx[[row, 2]]
  value_types <- sensors_df_mtrx[[row, 3]]
  col_counter <- 1
  row_counter <- 1
  for (icm in ic_meters) {
    for (vt in value_types) {
      plot_df <- meter_value_type_subsets_mtrx[[row_counter, col_counter]]
      if (nrow(plot_df) > 0) {
        line_plot_gatherer[[(plot_counter <- plot_counter + 1)]] <- plot_ly(plot_df, x = ~time_stamp, y = ~value, type = "scatter", mode = "lines") %>% 
          layout(title = paste("Meter -", icm, "- Value Type -", vt))
        #Put some space between the plots
        line_plot_gatherer[[(plot_counter <- plot_counter + 1)]] <- htmltools::h1("") 
        col_counter <- col_counter + 1
      }
    }
    col_counter <- 1
    row_counter <- row_counter + 1
  }
}
line_plot_gatherer

Source file - 2_sensors_Dec_11_2016-Mar_26_2017.sql.gz

Source file - 2_sensors_sensor_type_1.sql.gz

Source file - 8_sensors_sensor_type_2.sql.gz

OpenTSDB Format

This section writes data for import into OpenTSDB, which could then be used with Grafana, for example.

The data format, from here is:

<metric> <timestamp> <value> <tagk=tagv> [<tagkN=tagvN>]

The data has to be made available on a file system that the OpenTSDB import command can see, and can then be imported like this:

import timeseries1.gz timeseries2.gz
for (row in 1:nrow(sensors_df_mtrx)) {
  meter_value_type_subsets_mtrx <- sensors_df_mtrx[[row, 1]]
  ic_meters <- sensors_df_mtrx[[row, 2]]
  value_types <- sensors_df_mtrx[[row, 3]]
  col_counter <- 1
  row_counter <- 1
  for (icm in ic_meters) {
    for (vt in value_types) {
      export_df <- meter_value_type_subsets_mtrx[[row_counter, col_counter]]
      
    }
  }
  }